source("cleaning.R")
require(splines)
library(GGally)
daily_data %>% select(-Date, -day, -YDay, -TotalDay) %>% ggpairs()

A<-daily_data %>% 
  group_by(YDay) %>%
  summarize(GHI_max = max(GHI_sum), average_insolation) %>% 
  mutate(winter = ifelse(YDay > 300 | YDay < 100, TRUE, FALSE))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'YDay'. You can override using the
## `.groups` argument.
A %>% ggplot(aes(x=average_insolation, GHI_max)) + geom_point()

modA <- glm(GHI_max ~ average_insolation, data = A)

modA %>% summary()
## 
## Call:
## glm(formula = GHI_max ~ average_insolation, data = A)
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -84.76605   15.22436  -5.568 2.65e-08 ***
## average_insolation  17.19579    0.03657 470.215  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 9689.359)
## 
##     Null deviance: 2229695106  on 9017  degrees of freedom
## Residual deviance:   87359261  on 9016  degrees of freedom
## AIC: 108370
## 
## Number of Fisher Scoring iterations: 2
detrended <- daily_data %>% 
  mutate(pred = predict(modA,newdata=daily_data)) %>%
  mutate(detrended = (GHI_sum/pred))

detrended %>% ggplot(aes(x=detrended, y=GHI_sum, color = DryBulb_max)) + 
  geom_point()

detrended %>%
  ggplot(aes(x=average_insolation, y=detrended)) +
  geom_point()

detrended %>% pivot_longer(cols = c("RHum_mean","DryBulb_mean","Wspd_mean")) %>%
  ggplot(aes(x=value,y=detrended)) +
    geom_point(alpha=.05) +
    geom_smooth() +
    facet_wrap(~name, scales = "free")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

detrended %>% ggplot(aes(x=factor(Alb_mean), y=detrended, group = factor(Alb_mean))) +
  geom_boxplot()

detrended %>% pivot_longer(cols = c("RHum_mean","DryBulb_mean")) %>%
  ggplot(aes(x=value,y=detrended,color=Wspd_mean)) +
    geom_point(alpha=1) +
    geom_smooth() +
    facet_wrap(~name, scales = "free")+
    scale_color_binned(type = "viridis")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

detrended %>% pivot_longer(cols = c("RHum_mean","Wspd_mean")) %>%
  ggplot(aes(x=value,y=detrended,color=DryBulb_mean)) +
    geom_point(alpha=1) +
    geom_smooth() +
    facet_wrap(~name, scales = "free")+
    scale_color_binned(type = "viridis")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

spaghettimodel <- glm(detrended ~ DryBulb_mean*RHum_mean*Wspd_mean, data =detrended)

spaghettimodel %>% summary() # none of the coefficients with windspeed are significant
## 
## Call:
## glm(formula = detrended ~ DryBulb_mean * RHum_mean * Wspd_mean, 
##     data = detrended)
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                       5.0105569  2.1435041   2.338   0.0194 *
## DryBulb_mean                     -0.1388100  0.0795844  -1.744   0.0812 .
## RHum_mean                        -0.0681065  0.0266940  -2.551   0.0107 *
## Wspd_mean                         0.1001506  0.3886655   0.258   0.7967  
## DryBulb_mean:RHum_mean            0.0023221  0.0009947   2.334   0.0196 *
## DryBulb_mean:Wspd_mean            0.0001917  0.0146381   0.013   0.9895  
## RHum_mean:Wspd_mean               0.0019675  0.0048194   0.408   0.6831  
## DryBulb_mean:RHum_mean:Wspd_mean -0.0001219  0.0001820  -0.670   0.5030  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.02870845)
## 
##     Null deviance: 293.58  on 9017  degrees of freedom
## Residual deviance: 258.66  on 9010  degrees of freedom
## AIC: -6417
## 
## Number of Fisher Scoring iterations: 2
pennemodel <- glm(detrended ~ DryBulb_mean*RHum_mean, data =detrended)

pennemodel %>% summary() # higher AIC than spaghetti
## 
## Call:
## glm(formula = detrended ~ DryBulb_mean * RHum_mean, data = detrended)
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             5.6439443  0.7996973   7.058 1.82e-12 ***
## DryBulb_mean           -0.1395712  0.0299567  -4.659 3.22e-06 ***
## RHum_mean              -0.0562554  0.0099052  -5.679 1.39e-08 ***
## DryBulb_mean:RHum_mean  0.0015991  0.0003723   4.295 1.77e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.02901826)
## 
##     Null deviance: 293.58  on 9017  degrees of freedom
## Residual deviance: 261.57  on 9014  degrees of freedom
## AIC: -6324.2
## 
## Number of Fisher Scoring iterations: 2
raviolimodel <- glm(detrended ~ DryBulb_mean*RHum_mean + Wspd_mean, data = detrended)

raviolimodel %>% summary() # AIC lower than penne but higher than spaghetti
## 
## Call:
## glm(formula = detrended ~ DryBulb_mean * RHum_mean + Wspd_mean, 
##     data = detrended)
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             5.5108927  0.7991551   6.896 5.71e-12 ***
## DryBulb_mean           -0.1358069  0.0299289  -4.538 5.76e-06 ***
## RHum_mean              -0.0565297  0.0098929  -5.714 1.14e-08 ***
## Wspd_mean               0.0044031  0.0009031   4.876 1.10e-06 ***
## DryBulb_mean:RHum_mean  0.0016143  0.0003719   4.341 1.44e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.02894513)
## 
##     Null deviance: 293.58  on 9017  degrees of freedom
## Residual deviance: 260.88  on 9013  degrees of freedom
## AIC: -6346
## 
## Number of Fisher Scoring iterations: 2
detrended %>% mutate(pred = predict(spaghettimodel)) %>%
  mutate(resid = detrended - pred) %>%
  ggplot() +
    geom_point(aes(x=detrended, y=resid))

detrended %>% filter(Date > "2015-01-01") %>%
  ggplot(aes(x=Date, y=detrended)) + 
  geom_point()

why am I assuming the relationship is linear?

detrended  %>%
ggplot(aes(x=RHum_mean, y=detrended)) + 
  geom_boxplot(aes(x=RHum_mean%/%1, y=detrended, group = RHum_mean%/%1), alpha = 0) +
  geom_point(alpha = .1) +
  geom_smooth(formula = y ~ bs(x, knots = c(80), degree = 1))
## `geom_smooth()` using method = 'gam'

detrended  %>%
ggplot(aes(x=DryBulb_mean, y=detrended)) + 
  geom_boxplot(aes(x=DryBulb_mean%/%1, y=detrended, group = DryBulb_mean%/%1), alpha = 0) +
  geom_point(alpha = .1) +
  geom_smooth(formula = y ~ bs(x, knots = c(80), degree = 1))
## `geom_smooth()` using method = 'gam'

pumpkinmodel <- glm(detrended ~ bs(RHum_mean, knots = c(80), degree = 1), data = detrended)

detrended %>% mutate(pred = predict(pumpkinmodel)) %>%
  mutate(resid = detrended - pred) %>%
  ggplot(aes(x=RHum_mean, y=resid)) + 
  geom_point()

detrended %>% mutate(pred = predict(pumpkinmodel)) %>%
  mutate(resid = detrended - pred) %>%
  ggplot(aes(x=detrended)) + 
  geom_point(aes(y=resid), color = "red", alpha = .2)

detrended %>% 
  ggplot(aes(x=YDay, DryBulb_mean, color=ifelse(I(Date > date("2017-01-01")), "red", "blue"))) + 
  geom_point(alpha = .05) +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

detrended  %>%
ggplot(aes(x=YDay, y=detrended)) + 
  geom_boxplot(aes(x=YDay, y=detrended, group = YDay%/%10), alpha = 0) +
  geom_point(alpha = .1) +
  geom_smooth(formula = y ~ bs(x, degree = 2))
## `geom_smooth()` using method = 'gam'

detrended  %>%
ggplot(aes(x=YDay, y=GHI_sum)) + 
  geom_boxplot(aes(x=YDay, y=GHI_sum, group = YDay%/%10), alpha = 0) +
  geom_point(alpha = .1, aes(color = RHum_mean)) +
  geom_line(aes(y=pred), color = "red") + 
  geom_smooth(formula = y ~ bs(x, degree = 5)) + scale_color_binned(type = "viridis", n.breaks = 10)
## `geom_smooth()` using method = 'gam'

melonmodel <- glm(detrended ~ bs(YDay, degree = 2, knots = c(90, 300)) + bs(RHum_mean, knots = c(80), degree = 1), data = detrended)
melonmodel %>% summary
## 
## Call:
## glm(formula = detrended ~ bs(YDay, degree = 2, knots = c(90, 
##     300)) + bs(RHum_mean, knots = c(80), degree = 1), data = detrended)
## 
## Coefficients:
##                                            Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                0.899244   0.016241  55.367  < 2e-16
## bs(YDay, degree = 2, knots = c(90, 300))1  0.034649   0.012711   2.726  0.00642
## bs(YDay, degree = 2, knots = c(90, 300))2  0.246895   0.009402  26.260  < 2e-16
## bs(YDay, degree = 2, knots = c(90, 300))3  0.096122   0.011205   8.579  < 2e-16
## bs(YDay, degree = 2, knots = c(90, 300))4  0.090977   0.012779   7.119 1.17e-12
## bs(RHum_mean, knots = c(80), degree = 1)1 -0.164522   0.015182 -10.836  < 2e-16
## bs(RHum_mean, knots = c(80), degree = 1)2 -0.417059   0.013485 -30.927  < 2e-16
##                                              
## (Intercept)                               ***
## bs(YDay, degree = 2, knots = c(90, 300))1 ** 
## bs(YDay, degree = 2, knots = c(90, 300))2 ***
## bs(YDay, degree = 2, knots = c(90, 300))3 ***
## bs(YDay, degree = 2, knots = c(90, 300))4 ***
## bs(RHum_mean, knots = c(80), degree = 1)1 ***
## bs(RHum_mean, knots = c(80), degree = 1)2 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.02619904)
## 
##     Null deviance: 293.58  on 9017  degrees of freedom
## Residual deviance: 236.08  on 9011  degrees of freedom
## AIC: -7242.9
## 
## Number of Fisher Scoring iterations: 2
cowmodel <- glm(GHI_sum ~ bs(YDay, degree = 5), data = daily_data)

# detrended %>% mutate(pred = predict(melonmodel)) %>%
#   mutate(resid = detrended - pred) %>%
#   ggplot(aes(x=Date, y=resid)) + 
#   geom_point(alpha = .1) + 
#   geom_smooth() +
#   geom_hline(yintercept = 0)


detrended %>% mutate(pred = predict(melonmodel)) %>%
  mutate(resid = detrended - pred) %>%
ggplot(aes(x=month(Date), y=resid)) + 
  geom_boxplot(alpha = .1, aes(group = month(Date))) + 
  geom_smooth(formula = y ~ x) +
  geom_hline(yintercept = 0) + 
  geom_point()
## `geom_smooth()` using method = 'gam'

# detrended %>% mutate(pred = predict(melonmodel)) %>%
#   mutate(resid = detrended - pred) %>%
#   ggplot(aes(x=detrended)) + 
#   geom_point(aes(y=resid), color = "red", alpha = .2) 

melon <- detrended %>% mutate(pred = predict(melonmodel)) %>%
  mutate(resid = detrended - pred) 


melon %>% ggplot(aes(x=month(Date), y=pred)) +
  geom_point()+
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

melon %>% ggplot(aes(x=detrended, y=resid)) +
  geom_point()+
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

melonmodel %>% summary()
## 
## Call:
## glm(formula = detrended ~ bs(YDay, degree = 2, knots = c(90, 
##     300)) + bs(RHum_mean, knots = c(80), degree = 1), data = detrended)
## 
## Coefficients:
##                                            Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                0.899244   0.016241  55.367  < 2e-16
## bs(YDay, degree = 2, knots = c(90, 300))1  0.034649   0.012711   2.726  0.00642
## bs(YDay, degree = 2, knots = c(90, 300))2  0.246895   0.009402  26.260  < 2e-16
## bs(YDay, degree = 2, knots = c(90, 300))3  0.096122   0.011205   8.579  < 2e-16
## bs(YDay, degree = 2, knots = c(90, 300))4  0.090977   0.012779   7.119 1.17e-12
## bs(RHum_mean, knots = c(80), degree = 1)1 -0.164522   0.015182 -10.836  < 2e-16
## bs(RHum_mean, knots = c(80), degree = 1)2 -0.417059   0.013485 -30.927  < 2e-16
##                                              
## (Intercept)                               ***
## bs(YDay, degree = 2, knots = c(90, 300))1 ** 
## bs(YDay, degree = 2, knots = c(90, 300))2 ***
## bs(YDay, degree = 2, knots = c(90, 300))3 ***
## bs(YDay, degree = 2, knots = c(90, 300))4 ***
## bs(RHum_mean, knots = c(80), degree = 1)1 ***
## bs(RHum_mean, knots = c(80), degree = 1)2 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.02619904)
## 
##     Null deviance: 293.58  on 9017  degrees of freedom
## Residual deviance: 236.08  on 9011  degrees of freedom
## AIC: -7242.9
## 
## Number of Fisher Scoring iterations: 2
astsa::acf2(melon$detrended)

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## ACF  0.32 0.17 0.12 0.09 0.09 0.07 0.07 0.09 0.07  0.06  0.05  0.06  0.06  0.04
## PACF 0.32 0.08 0.05 0.04 0.04 0.02 0.03 0.05 0.02  0.01  0.01  0.03  0.02  0.00
##      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
## ACF   0.06  0.07  0.06  0.08  0.06  0.05  0.06  0.06  0.06  0.06  0.04  0.05
## PACF  0.04  0.03  0.01  0.04  0.01  0.00  0.02  0.02  0.02  0.01  0.00  0.01
##      [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
## ACF   0.05  0.05  0.05  0.06  0.06  0.06  0.05  0.04  0.03  0.04  0.04  0.05
## PACF  0.01  0.01  0.02  0.02  0.02  0.01  0.00  0.00  0.00  0.00  0.01  0.02
##      [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
## ACF   0.05  0.06  0.05  0.06  0.05  0.05  0.06  0.06  0.06  0.06  0.03  0.04
## PACF  0.01  0.03  0.00  0.02  0.01  0.01  0.02  0.01  0.01  0.01 -0.01  0.00
##      [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
## ACF   0.02  0.04  0.04  0.02  0.02  0.01  0.02  0.01  0.01  0.01  0.02  0.03
## PACF -0.01  0.01  0.00 -0.01  0.00 -0.01  0.00 -0.01 -0.01  0.00  0.00  0.01
##      [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74]
## ACF   0.03  0.02  0.03  0.03  0.02  0.03  0.02  0.01  0.02  0.01  0.00  0.01
## PACF  0.01 -0.01  0.02  0.00 -0.01  0.01 -0.01 -0.01  0.00  0.00 -0.01  0.00
##      [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86]
## ACF   0.02  0.01 -0.01     0  0.01  0.01  0.01 -0.01  0.01 -0.01 -0.02 -0.01
## PACF  0.01 -0.01 -0.03     0  0.00  0.00  0.00 -0.02  0.02 -0.02 -0.02 -0.01
##      [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98]
## ACF  -0.02 -0.01     0  0.02  0.03  0.04  0.03  0.00 -0.01 -0.01 -0.03 -0.02
## PACF -0.02  0.00     0  0.02  0.02  0.03  0.00 -0.02 -0.01 -0.01 -0.03 -0.01
##      [,99] [,100] [,101] [,102] [,103] [,104] [,105]
## ACF      0  -0.01  -0.01  -0.02  -0.01  -0.01  -0.01
## PACF     0  -0.01   0.00  -0.01   0.00  -0.01   0.00
astsa::acf2(melon$resid)

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## ACF  0.23 0.10 0.06 0.04 0.03 0.02 0.03 0.04 0.03  0.02 -0.01     0     0  0.00
## PACF 0.23 0.05 0.03 0.01 0.02 0.01 0.02 0.03 0.01  0.00 -0.02     0     0 -0.01
##      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
## ACF   0.02  0.02  0.00  0.02  0.01  0.00  0.01     0  0.00     0 -0.01  0.00
## PACF  0.02  0.02 -0.01  0.02  0.00 -0.01  0.01     0 -0.01     0 -0.02  0.01
##      [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
## ACF   0.01  0.01  0.02  0.03  0.02  0.02  0.00     0  0.01  0.01  0.02  0.03
## PACF  0.01  0.01  0.01  0.02  0.01  0.01 -0.01     0  0.00  0.00  0.01  0.02
##      [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
## ACF   0.03  0.03  0.01  0.03  0.02  0.03  0.04  0.03  0.03  0.03  0.02  0.03
## PACF  0.01  0.02  0.00  0.02  0.01  0.02  0.02  0.01  0.01  0.01  0.00  0.01
##      [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
## ACF   0.00  0.02  0.02  0.01  0.01  0.00  0.01  0.01     0  0.00     0  0.02
## PACF -0.01  0.01  0.01  0.00  0.00 -0.01  0.00  0.00     0 -0.01     0  0.02
##      [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74]
## ACF   0.03  0.01  0.03  0.02  0.01  0.03  0.02  0.01  0.02  0.02  0.02  0.02
## PACF  0.02  0.00  0.03  0.00 -0.01  0.02  0.00  0.00  0.01  0.00  0.00  0.01
##      [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86]
## ACF   0.02  0.01 -0.01  0.01  0.01  0.01  0.01 -0.01  0.03  0.00 -0.01     0
## PACF  0.01  0.00 -0.02  0.01  0.01  0.00  0.00 -0.01  0.02 -0.01 -0.02     0
##      [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98]
## ACF   0.00  0.01  0.01  0.03  0.03  0.05  0.04  0.02  0.01  0.01  0.00     0
## PACF -0.01  0.01  0.00  0.03  0.01  0.03  0.01  0.00 -0.01  0.01 -0.02     0
##      [,99] [,100] [,101] [,102] [,103] [,104] [,105]
## ACF   0.02   0.01   0.02   0.02   0.01   0.01   0.01
## PACF  0.02   0.00   0.01   0.00   0.00   0.00   0.00
ar3model <- astsa::sarima(melon$detrended, p = 3,d = 0,q = 0)
## initial  value -1.712313 
## iter   2 value -1.757344
## iter   3 value -1.771756
## iter   4 value -1.771889
## iter   5 value -1.771898
## iter   6 value -1.771898
## iter   6 value -1.771898
## iter   6 value -1.771898
## final  value -1.771898 
## converged
## initial  value -1.771952 
## iter   1 value -1.771952
## final  value -1.771952 
## converged

ar3ma1model <- astsa::sarima(melon$detrended,p=3,d=0,q=1)
## initial  value -1.712313 
## iter   2 value -1.752215
## iter   3 value -1.771605
## iter   4 value -1.771691
## iter   5 value -1.771697
## iter   6 value -1.771698
## iter   7 value -1.771730
## iter   8 value -1.771789
## iter   9 value -1.772020
## iter  10 value -1.772342
## iter  11 value -1.772980
## iter  12 value -1.773161
## iter  13 value -1.773266
## iter  14 value -1.773276
## iter  15 value -1.773933
## iter  16 value -1.774273
## iter  17 value -1.774674
## iter  18 value -1.774772
## iter  19 value -1.775016
## iter  20 value -1.775075
## iter  21 value -1.775120
## iter  22 value -1.775134
## iter  23 value -1.775435
## iter  24 value -1.776057
## iter  25 value -1.776204
## iter  26 value -1.776785
## iter  27 value -1.776944
## iter  28 value -1.777243
## iter  29 value -1.777760
## iter  30 value -1.777768
## iter  31 value -1.777769
## iter  31 value -1.777769
## final  value -1.777769 
## converged
## initial  value -1.778220 
## iter   2 value -1.778220
## iter   3 value -1.778221
## iter   4 value -1.778222
## iter   5 value -1.778222
## iter   6 value -1.778224
## iter   7 value -1.778226
## iter   8 value -1.778227
## iter   9 value -1.778227
## iter   9 value -1.778227
## iter   9 value -1.778227
## final  value -1.778227 
## converged

ar3ma2model <- astsa::sarima(melon$detrended, p = 3,d = 0,q = 2)
## initial  value -1.712313 
## iter   2 value -1.754735
## iter   3 value -1.769031
## iter   4 value -1.770990
## iter   5 value -1.771550
## iter   6 value -1.771561
## iter   7 value -1.771710
## iter   8 value -1.771998
## iter   9 value -1.772600
## iter  10 value -1.773954
## iter  11 value -1.776505
## iter  12 value -1.776551
## iter  13 value -1.776933
## iter  14 value -1.777335
## iter  15 value -1.777463
## iter  16 value -1.777640
## iter  17 value -1.777740
## iter  18 value -1.777748
## iter  19 value -1.777750
## iter  20 value -1.777750
## iter  21 value -1.777751
## iter  22 value -1.777751
## iter  23 value -1.777753
## iter  24 value -1.777755
## iter  25 value -1.777763
## iter  26 value -1.777779
## iter  27 value -1.777814
## iter  28 value -1.777815
## iter  29 value -1.777835
## iter  30 value -1.777853
## iter  31 value -1.777855
## iter  32 value -1.777855
## iter  33 value -1.777855
## iter  34 value -1.777856
## iter  35 value -1.777858
## iter  36 value -1.777863
## iter  37 value -1.777876
## iter  38 value -1.777904
## iter  39 value -1.777939
## iter  40 value -1.777954
## iter  41 value -1.777955
## iter  42 value -1.777956
## iter  43 value -1.777960
## iter  44 value -1.777962
## iter  45 value -1.777962
## iter  45 value -1.777962
## iter  45 value -1.777962
## final  value -1.777962 
## converged
## initial  value -1.778132 
## iter   1 value -1.778132
## final  value -1.778132 
## converged

ar4ma1model <- astsa::sarima(melon$detrended, p = 4,d = 0,q = 1)
## initial  value -1.712307 
## iter   2 value -1.753307
## iter   3 value -1.772278
## iter   4 value -1.772370
## iter   5 value -1.772382
## iter   6 value -1.772383
## iter   7 value -1.772416
## iter   8 value -1.772474
## iter   9 value -1.772696
## iter  10 value -1.773583
## iter  11 value -1.773924
## iter  12 value -1.774464
## iter  13 value -1.774766
## iter  14 value -1.774866
## iter  15 value -1.774917
## iter  16 value -1.775190
## iter  17 value -1.776159
## iter  18 value -1.776225
## iter  19 value -1.776632
## iter  20 value -1.777640
## iter  21 value -1.777750
## iter  22 value -1.777797
## iter  23 value -1.777894
## iter  24 value -1.777902
## iter  25 value -1.777903
## iter  26 value -1.777903
## iter  27 value -1.777903
## iter  28 value -1.777903
## iter  28 value -1.777903
## iter  28 value -1.777903
## final  value -1.777903 
## converged
## initial  value -1.778246 
## iter   2 value -1.778246
## iter   3 value -1.778246
## iter   4 value -1.778247
## iter   4 value -1.778247
## iter   4 value -1.778247
## final  value -1.778247 
## converged

ar4ma2model <- astsa::sarima(melon$detrended, p = 4,d = 0,q = 2)
## initial  value -1.712307 
## iter   2 value -1.755760
## iter   3 value -1.769845
## iter   4 value -1.771863
## iter   5 value -1.772299
## iter   6 value -1.772304
## iter   7 value -1.772315
## iter   8 value -1.772341
## iter   9 value -1.772419
## iter  10 value -1.772678
## iter  11 value -1.773373
## iter  12 value -1.774016
## iter  13 value -1.774438
## iter  14 value -1.774747
## iter  15 value -1.774807
## iter  16 value -1.774955
## iter  17 value -1.775477
## iter  18 value -1.775673
## iter  19 value -1.775882
## iter  20 value -1.776046
## iter  21 value -1.776406
## iter  22 value -1.776963
## iter  23 value -1.777591
## iter  24 value -1.777819
## iter  25 value -1.777836
## iter  26 value -1.777838
## iter  26 value -1.777838
## final  value -1.777838 
## converged
## initial  value -1.778245 
## iter   2 value -1.778245
## iter   3 value -1.778246
## iter   4 value -1.778246
## iter   5 value -1.778246
## iter   6 value -1.778247
## iter   7 value -1.778247
## iter   8 value -1.778248
## iter   9 value -1.778248
## iter   9 value -1.778248
## iter   9 value -1.778248
## final  value -1.778248 
## converged

ar3ma3model <- astsa::sarima(melon$detrended, p = 3,d = 0,q = 3)
## initial  value -1.712313 
## iter   2 value -1.755553
## iter   3 value -1.766910
## iter   4 value -1.771225
## iter   5 value -1.771533
## iter   6 value -1.771771
## iter   7 value -1.772464
## iter   8 value -1.773131
## iter   9 value -1.773259
## iter  10 value -1.773272
## iter  11 value -1.773280
## iter  12 value -1.773297
## iter  13 value -1.773344
## iter  14 value -1.773486
## iter  15 value -1.774065
## iter  16 value -1.774247
## iter  17 value -1.775195
## iter  18 value -1.775979
## iter  19 value -1.776263
## iter  20 value -1.777200
## iter  21 value -1.777253
## iter  22 value -1.777266
## iter  23 value -1.777319
## iter  24 value -1.777392
## iter  25 value -1.777843
## iter  26 value -1.778069
## iter  27 value -1.778112
## iter  28 value -1.778135
## iter  29 value -1.778137
## iter  30 value -1.778138
## iter  31 value -1.778138
## iter  31 value -1.778138
## final  value -1.778138 
## converged
## initial  value -1.778251 
## iter   2 value -1.778251
## iter   3 value -1.778251
## iter   4 value -1.778251
## iter   5 value -1.778251
## iter   6 value -1.778251
## iter   7 value -1.778251
## iter   7 value -1.778251
## iter   7 value -1.778251
## final  value -1.778251 
## converged

ar3model$AIC
## [1] -0.7049186
ar3ma1model$AIC
## [1] -0.7172468
ar3ma2model$AIC
## [1] -0.7168336
ar4ma1model$AIC
## [1] -0.717064
ar4ma2model$AIC
## [1] -0.7168451
ar3ma3model$AIC
## [1] -0.7168513
cbind(melon, armaresid = ar3ma1model$fit$residuals) %>%
  ggplot(aes(x=month(Date), y=resid)) +
    geom_boxplot(aes(group=month(Date))) +
    geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

cbind(melon, armaresid = ar3ma1model$fit$residuals) %>% 
  ggplot(aes(x=Date, y = armaresid)) + 
  geom_boxplot(aes(group = year(Date))) +
  geom_smooth(aes(x=Date, color=ifelse(I(Date > date("2017-01-01")), "red", "blue")), method = "lm") +
  geom_vline(xintercept = date("2017-01-01"), color = "red")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## `geom_smooth()` using formula = 'y ~ x'

detrended %>% ggplot(aes(x=Date, y=detrended)) + 
  geom_boxplot(aes(group = year(Date))) +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

detrended %>%
  ggplot(aes(x=RHum_mean,y=detrended,color=DryBulb_mean%/%1)) +
    geom_point(alpha=.5) +
    geom_smooth(aes(group = DryBulb_mean %/% 1)) +
  geom_smooth(color="red")+
  scale_color_binned(type = "viridis")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

detrended %>%
  ggplot(aes(x=DryBulb_mean,y=detrended,color=RHum_mean%/%5)) +
    geom_point(alpha=.5) +
    geom_smooth(aes(group = RHum_mean %/% 5)) +
    geom_smooth(color="red")+
  scale_color_binned(type = "viridis")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

detrended %>% 
  ggplot(aes(x=RHum_mean,y=detrended,color=YDay)) +
    geom_point(alpha=.5) +
    geom_smooth(method = "lm") +
  facet_wrap(~YDay%/%50)+
    scale_color_binned(type = "viridis", n.breaks = 10)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

maybe focusing on mean values is wrong?

detrended %>% select(-Date, -day, -YDay, -TotalDay) %>% ggpairs(aes(alpha = .001))

detrended %>%
  ggplot(aes(x=RHum_max,y=detrended,color=DryBulb_mean%/%1)) +
    geom_point(alpha=.5) +
    geom_smooth(aes(group = DryBulb_mean %/% 1)) +
    geom_smooth(color="red")+
    scale_color_binned(type = "viridis")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

# this looks like the exact same
pheasantmodel <- glm(detrended ~ RHum_mean, data = detrended)

pheasantmodel %>% summary() 
## 
## Call:
## glm(formula = detrended ~ RHum_mean, data = detrended)
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.8202588  0.0310349   58.65   <2e-16 ***
## RHum_mean   -0.0123874  0.0003813  -32.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.02914933)
## 
##     Null deviance: 293.58  on 9017  degrees of freedom
## Residual deviance: 262.81  on 9016  degrees of freedom
## AIC: -6285.6
## 
## Number of Fisher Scoring iterations: 2
pheasant <- detrended %>% 
  mutate(pred = predict(pheasantmodel)) %>%
  mutate(residuals = detrended - pred)

pheasant %>% ggplot(aes(x=average_insolation)) + 
  geom_point(aes(y=residuals))

pheasant %>% ggplot(aes(x=detrended)) + 
  geom_point(aes(y=residuals)) +
  geom_point(aes(y=detrended), size = .25, color = "green") + 
  geom_smooth(aes(y=residuals))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

big oof

pheasant %>%
  ggplot(aes(x=RHum_mean, y=residuals, color=DryBulb_max))+
  geom_point()

data %>% ggplot(aes(x=`RHum (%)`, y=`GHI (W/m^2)`)) +
  geom_boxplot(aes(group=`RHum (%)`))